Paper: using HFD to get a good understanding of the relationships between the Covid-19 measurements and the other covariates. we did this by breaking the signal down into three components. (1) the trend of the data. this is created using a loess smoothing. (2) the first difference of the trend. (3) the detrended data. this captures the noise processes. furthermore there is two view of the data. We look at the normal data and the log of data. The log of the data better captures the shape of Wastewater. at the bottom of the document contain plots showing a comprehensive view of the problem. The main goal of this document is to show the main takeaways of those plots in an easier way to see. The main takeaways are, 1) the covariates don’t strongly relate to the covid signal. 2) Crp and HF183 are strongly related. 3)
library(DSIWastewater)
library(dplyr)
library(ggplot2)
library(tidyr)
data("HFGWaste_data", package = "DSIWastewater")
#crazy agressive method
hfg_outlier_detection <- function(small_vec){
upbound_vec <- quantile(small_vec, probs = .7, na.rm = TRUE)
retVec = ifelse(small_vec > upbound_vec,
NA, small_vec)
retVec = ifelse(retVec == 0, NA, retVec)
return(retVec)
}
Pop_DF <- data.frame(
site = c("Hudson","Kenosha","Platteville","Madison","Merrill","Plymouth","River Falls","Sun Prairie","Wausau","Oshkosh","Wausau"),
pop = c(19680,122000,14000,380000,10000,9000,16000,34926,42000,67000,42000)
)
hfg_waste_filt_df <- HFGWaste_data%>%
select(site, date, Filter, Well, N1, N2, PMMOV, BCoV, HF183, CrP)%>%
group_by(date,site)%>%
mutate(across(N1:CrP, hfg_outlier_detection))%>%
left_join(Pop_DF)
trend_df <- hfg_waste_filt_df%>%
group_by(site)%>%
group_split()%>%
lapply(loessSmoothMod, InVar = "N1", OutVar = "N1_Trend")%>%
lapply(loessSmoothMod, InVar = "N2", OutVar = "N2_Trend")%>%
lapply(loessSmoothMod, InVar = "PMMOV", OutVar = "PMMOV_Trend")%>%
lapply(loessSmoothMod, InVar = "HF183", OutVar = "HF183_Trend")%>%
lapply(loessSmoothMod, InVar = "CrP", OutVar = "CrP_Trend")%>%
bind_rows()%>%
mutate(
N1_Diff = N1_Trend - N1,
N2_Diff = N2_Trend - N2,
PMMOV_Diff = PMMOV_Trend - PMMOV,
HF183_Diff = HF183_Trend - HF183,
CrP_Diff = CrP_Trend - CrP
)%>%
select(date, site, pop, N1_Trend:CrP_Diff)
library(ggcorrplot)
trend_df%>%
mutate(date = as.numeric(date))%>%
select(date, pop:CrP_Diff)%>%
cor(use = "complete.obs")%>%
ggcorrplot()
library(GGally)
trend_df%>%
ggpairs(aes(colour = site, alpha = 0.4))
log_trend_df <- hfg_waste_filt_df%>%
mutate(log_N1 = log(N1),
log_N2 = log(N2),
log_PMMOV = log(PMMOV),
log_HF183 = log(HF183),
log_CrP = log(CrP))%>%
group_by(site)%>%
group_split()%>%
lapply(loessSmoothMod, InVar = "log_N1", OutVar = "log_N1_Trend")%>%
lapply(loessSmoothMod, InVar = "log_N2", OutVar = "log_N2_Trend")%>%
lapply(loessSmoothMod, InVar = "log_PMMOV", OutVar = "log_PMMOV_Trend")%>%
lapply(loessSmoothMod, InVar = "log_HF183", OutVar = "log_HF183_Trend")%>%
lapply(loessSmoothMod, InVar = "log_CrP", OutVar = "log_CrP_Trend")%>%
bind_rows()%>%
mutate(
N1_Diff = log_N1_Trend - log_N1,
N2_Diff = log_N2_Trend - log_N2,
PMMOV_Diff = log_PMMOV_Trend - log_PMMOV,
HF183_Diff = log_HF183_Trend - log_HF183,
CrP_Diff = log_CrP_Trend - log_CrP)%>%
select(site, date, pop, log_N1_Trend:CrP_Diff)
log_trend_df%>%
select(date, pop, log_N1_Trend:CrP_Diff)%>%
mutate(date = as.numeric(date))%>%
cor(use = "complete.obs")%>%
ggcorrplot()
library(GGally)
log_trend_df%>%
ggpairs(aes(colour = site, alpha = 0.4))